# -*- coding: utf-8 -*-
# CreateDate : 2021/12/18 01:11
# Author     : 不肖生
# Github     : https://gitee.com/
# EditDate   : 
# sourceProj : wuliu_news
# Description:  处理链接，补全完整url；
#               处理图片src中有诸如700x700时，通过接口写入数据会更改为700<em>/<em>x700，而导致图片无法显示
import re


class DataProcess:

    @staticmethod
    def complete_url(text, with_domain):
        text = text.replace("""href="/""", f"""href="{with_domain}/""")
        text = text.replace("""src="/""", f"""src="{with_domain}/""")
        return text

    @staticmethod
    def rm_pic_pixs(text):
        text = re.sub("_\d+x\d+.jpg", ".jpg", text)
        text = re.sub("_\d+&#120;\d+.jpg", ".jpg", text)
        return text

    @staticmethod
    def purify_tags(html):
        """
        去掉标签内所有属性，保留img标签的src和a标签的href
        把<section>替换为<p>
        :param html:
        :return:
        """
        html = re.sub("<section .*?>", "<section>", html)
        html = re.sub("<p .*?>", "<p>", html)
        html = re.sub("<span .*?>", "<span>", html)
        html = re.sub("<div .*?>", "<div>", html)
        html = re.sub("<strong .*?>", "<strong>", html)
        html = re.sub("<a.*?href=\"(.*?)\".+?>", "<a href=\"\g<1>\">", html)
        html = re.sub("<img.*?src=\"(.+?)\".+?>", "<img src=\"\g<1>\">", html)
        html = re.sub("<br .*?>", "<br>", html)
        html = html.replace("section", "p")
        html = html.replace("<p>：</p>", "")
        html = html.replace("<span></span>", "")
        html = html.replace("<p><br></p>", "")
        html = html.replace("\n", "")

        old_html = html
        while True:
            html = html.replace("<p></p>", "")
            if html == old_html:
                break
            else:
                old_html = html
        return html
